Data Exploration - Lemonade

1 Import Libraries

In [1]:
import pandas as pd               # Dataframe manipulation
import numpy as np                # Mathematics operation
from datetime import datetime     # Date time manipulation
import matplotlib.pyplot as plt   # Data visualization
from sklearn import preprocessing # Data preprocessing
import re                         # Regular Expression

2 Load the Data and Exploration

In [58]:
data_lemon = pd.read_csv('Lemonade.csv')
In [3]:
data_lemon.head() # Show first data
Out[3]:
Date Day Temperature Rainfall Flyers Price Sales
0 01/01/2017 Sunday 27.0 2.00 15 0.3 10
1 02/01/2017 Monday 28.9 1.33 15 0.3 13
2 03/01/2017 Tuesday 34.5 1.33 27 0.3 15
3 04/01/2017 Wednesday 44.1 1.05 28 0.3 17
4 05/01/2017 Thursday 42.4 1.00 33 0.3 18

2.1 Data Structure and Summary

In [75]:
data_lemon.info() # Data structure
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 7 columns):
Date           365 non-null object
Day            365 non-null object
Temperature    365 non-null float64
Rainfall       365 non-null float64
Flyers         365 non-null int64
Price          365 non-null float64
Sales          365 non-null int64
dtypes: float64(3), int64(2), object(2)
memory usage: 20.0+ KB
In [73]:
data_lemon.describe() # Summary of statistics
Out[73]:
Temperature Rainfall Flyers Price Sales
count 365.000000 365.000000 365.000000 365.000000 365.000000
mean 0.519718 0.826603 40.284932 0.333973 25.323288
std 0.184468 0.273171 13.178651 0.075206 6.893589
min 0.000000 0.470000 9.000000 0.300000 7.000000
25% 0.394077 0.650000 31.000000 0.300000 20.000000
50% 0.523918 0.740000 39.000000 0.300000 25.000000
75% 0.640091 0.910000 49.000000 0.300000 30.000000
max 1.000000 2.500000 80.000000 0.500000 43.000000
In [79]:
# Summary of Statistics each categories
data_lemon.groupby(['Day'],as_index = False).describe()
Out[79]:
Temperature Rainfall ... Price Sales
count mean std min 25% 50% 75% max count mean ... 75% max count mean std min 25% 50% 75% max
0 52.0 0.524312 0.184333 0.116173 0.400057 0.528474 0.638667 0.958998 52.0 0.817115 ... 0.3 0.5 52.0 25.384615 7.010227 11.0 21.00 25.5 29.25 41.0
1 52.0 0.521509 0.194984 0.157175 0.389806 0.518793 0.641230 0.996583 52.0 0.823462 ... 0.3 0.5 52.0 25.461538 7.127581 12.0 20.75 25.0 30.00 42.0
2 52.0 0.521991 0.191609 0.179954 0.382688 0.527904 0.625285 1.000000 52.0 0.823077 ... 0.3 0.5 52.0 25.346154 7.221658 13.0 19.75 25.0 30.00 43.0
3 53.0 0.508768 0.194853 0.000000 0.394077 0.523918 0.644647 0.891800 53.0 0.873585 ... 0.3 0.5 53.0 24.830189 7.394738 7.0 19.00 25.0 29.00 38.0
4 52.0 0.524203 0.178309 0.191344 0.406036 0.515376 0.641230 0.943052 52.0 0.812500 ... 0.3 0.5 52.0 25.673077 6.729278 13.0 21.00 25.0 30.00 43.0
5 52.0 0.510754 0.187908 0.078588 0.416002 0.522210 0.648633 0.958998 52.0 0.829423 ... 0.3 0.5 52.0 25.134615 6.709750 10.0 21.00 25.0 30.00 41.0
6 52.0 0.526700 0.167437 0.194761 0.413724 0.534169 0.638667 0.902050 52.0 0.806154 ... 0.3 0.5 52.0 25.442308 6.369084 12.0 20.00 25.0 30.00 41.0

7 rows × 40 columns

2.2 Column Type Manipulation - Date

In [6]:
data_lemon['Date'].head()
Out[6]:
0    01/01/2017
1    02/01/2017
2    03/01/2017
3    04/01/2017
4    05/01/2017
Name: Date, dtype: object
In [7]:
data_lemon['Date'] =  pd.to_datetime(data_lemon['Date'],format = '%d/%m/%Y')
In [8]:
data_lemon['Date'].head()
Out[8]:
0   2017-01-01
1   2017-01-02
2   2017-01-03
3   2017-01-04
4   2017-01-05
Name: Date, dtype: datetime64[ns]

2.3 Column Name Manipulation

In [9]:
print(data_lemon.columns.values) # Column's name
['Date' 'Day' 'Temperature' 'Rainfall' 'Flyers' 'Price' 'Sales']
In [10]:
data_lemon.columns = ['Tanggal','Hari','Suhu','Intensitas Hujan','Leaflet','Harga','Penjualan']
In [11]:
data_lemon.rename(columns = {'Intensitas Hujan':'Rainfall'},inplace = True)

2.3 Make New Column

In [12]:
data_lemon['Pendapatan'] = data_lemon['Harga'] * data_lemon['Penjualan'] # Column 'Pendapatan'
In [13]:
data_lemon[['Pendapatan', 'Harga', 'Penjualan']].sample(n = 5)
Out[13]:
Pendapatan Harga Penjualan
146 9.3 0.3 31
114 8.1 0.3 27
172 9.3 0.3 31
222 15.0 0.5 30
341 4.5 0.3 15
In [14]:
data_lemon.head() # Show first data
Out[14]:
Tanggal Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
0 2017-01-01 Sunday 27.0 2.00 15 0.3 10 3.0
1 2017-01-02 Monday 28.9 1.33 15 0.3 13 3.9
2 2017-01-03 Tuesday 34.5 1.33 27 0.3 15 4.5
3 2017-01-04 Wednesday 44.1 1.05 28 0.3 17 5.1
4 2017-01-05 Thursday 42.4 1.00 33 0.3 18 5.4
In [15]:
# Make column 'Month' from 'Date' and input it in specific column
new_col = data_lemon['Tanggal'].dt.month
data_lemon.insert(loc = 1, column = 'Bulan', value = new_col)
In [16]:
data_lemon.head() # Show first data
Out[16]:
Tanggal Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
0 2017-01-01 1 Sunday 27.0 2.00 15 0.3 10 3.0
1 2017-01-02 1 Monday 28.9 1.33 15 0.3 13 3.9
2 2017-01-03 1 Tuesday 34.5 1.33 27 0.3 15 4.5
3 2017-01-04 1 Wednesday 44.1 1.05 28 0.3 17 5.1
4 2017-01-05 1 Thursday 42.4 1.00 33 0.3 18 5.4

2.4 Change Categorical Value in Column

In [17]:
cats_month = {1:'January',2:'February',3:'March',4:'April',5:'May',6:'June',
              7:'July',8:'August',9:'September',10:'October',11:'November',12:'December'}
data_lemon['Bulan'] = data_lemon['Bulan'].replace(to_replace = cats_month.keys(),value = cats_month.values())
In [18]:
print(data_lemon['Bulan'].unique())
['January' 'February' 'March' 'April' 'May' 'June' 'July' 'August'
 'September' 'October' 'November' 'December']
In [19]:
data_lemon.head() # Show first data
Out[19]:
Tanggal Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
0 2017-01-01 January Sunday 27.0 2.00 15 0.3 10 3.0
1 2017-01-02 January Monday 28.9 1.33 15 0.3 13 3.9
2 2017-01-03 January Tuesday 34.5 1.33 27 0.3 15 4.5
3 2017-01-04 January Wednesday 44.1 1.05 28 0.3 17 5.1
4 2017-01-05 January Thursday 42.4 1.00 33 0.3 18 5.4

2.5 Unique Value each Columns

In [20]:
print('Number unique value:',data_lemon['Hari'].nunique()) # Number unique value
print('Unique value of Hari: \n', data_lemon['Hari'].unique()) # Unique value column 'Hari'
Number unique value: 7
Unique value of Hari: 
 ['Sunday' 'Monday' 'Tuesday' 'Wednesday' 'Thursday' 'Friday' 'Saturday']

2.6 Check Missing Value

In [21]:
for i in data_lemon.columns.values: # Count missing value each columns
    print(i,':',data_lemon[i].isnull().sum())
Tanggal : 0
Bulan : 0
Hari : 0
Suhu : 0
Rainfall : 0
Leaflet : 0
Harga : 0
Penjualan : 0
Pendapatan : 0

3 Aggregating Data

In [22]:
data_agg = data_lemon.copy() # Copy the data

3.1 Aggregating Data by Date

In [23]:
data_agg.groupby(['Hari'],as_index = False)['Penjualan','Pendapatan'].agg('sum')
Out[23]:
Hari Penjualan Pendapatan
0 Friday 1320 448.8
1 Monday 1324 457.0
2 Saturday 1318 457.8
3 Sunday 1316 454.0
4 Thursday 1335 460.9
5 Tuesday 1307 451.1
6 Wednesday 1323 454.1
In [24]:
# Specific aggregation function
df_agg = data_agg.groupby(['Hari'],as_index = False).agg({'Suhu':'mean','Rainfall':'mean','Leaflet':'sum',
                                                          'Harga':'mean','Penjualan':'sum','Pendapatan':'sum'})
df_agg
Out[24]:
Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
0 Friday 61.134615 0.817115 2097 0.330769 1320 448.8
1 Monday 60.888462 0.823462 2069 0.334615 1324 457.0
2 Saturday 60.930769 0.823077 1997 0.334615 1318 457.8
3 Sunday 59.769811 0.873585 2137 0.333962 1316 454.0
4 Thursday 61.125000 0.812500 2117 0.334615 1335 460.9
5 Tuesday 59.944231 0.829423 2135 0.334615 1307 451.1
6 Wednesday 61.344231 0.806154 2152 0.334615 1323 454.1
In [25]:
# Sort specific value
cats = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
df_agg['Hari'] = pd.Categorical(df_agg['Hari'],categories = cats,ordered = True)
df_agg.sort_values('Hari',ascending = True)
Out[25]:
Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
1 Monday 60.888462 0.823462 2069 0.334615 1324 457.0
5 Tuesday 59.944231 0.829423 2135 0.334615 1307 451.1
6 Wednesday 61.344231 0.806154 2152 0.334615 1323 454.1
4 Thursday 61.125000 0.812500 2117 0.334615 1335 460.9
0 Friday 61.134615 0.817115 2097 0.330769 1320 448.8
2 Saturday 60.930769 0.823077 1997 0.334615 1318 457.8
3 Sunday 59.769811 0.873585 2137 0.333962 1316 454.0

3.2 Aggregating Data by Month

In [26]:
data_agg.groupby(['Bulan'],as_index = False)['Penjualan','Pendapatan'].agg('sum')
Out[26]:
Bulan Penjualan Pendapatan
0 April 786 235.8
1 August 941 470.5
2 December 462 138.6
3 February 557 167.1
4 January 462 138.6
5 July 1113 556.5
6 June 1056 316.8
7 March 742 222.6
8 May 915 274.5
9 November 632 189.6
10 October 765 229.5
11 September 812 243.6
In [27]:
# Specific aggregation function
df_agg = data_agg.groupby(['Hari'],as_index = False).agg({'Suhu':'mean','Rainfall':'mean','Leaflet':'sum',
                                                          'Harga':'mean','Penjualan':'sum','Pendapatan':'sum'})
df_agg.reindex_axis([1,5,6,4,0,2,3]) # Reindex
Out[27]:
Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
1 Monday 60.888462 0.823462 2069 0.334615 1324 457.0
5 Tuesday 59.944231 0.829423 2135 0.334615 1307 451.1
6 Wednesday 61.344231 0.806154 2152 0.334615 1323 454.1
4 Thursday 61.125000 0.812500 2117 0.334615 1335 460.9
0 Friday 61.134615 0.817115 2097 0.330769 1320 448.8
2 Saturday 60.930769 0.823077 1997 0.334615 1318 457.8
3 Sunday 59.769811 0.873585 2137 0.333962 1316 454.0

4 Cross Tabulation

In [28]:
df_cross = pd.crosstab(data_lemon.Bulan,data_lemon.Hari) # Add 'margin = True' to show total
df_cross
Out[28]:
Hari Friday Monday Saturday Sunday Thursday Tuesday Wednesday
Bulan
April 4 4 5 5 4 4 4
August 4 4 4 4 5 5 5
December 5 4 5 5 4 4 4
February 4 4 4 4 4 4 4
January 4 5 4 5 4 5 4
July 4 5 5 5 4 4 4
June 5 4 4 4 5 4 4
March 5 4 4 4 5 4 5
May 4 5 4 4 4 5 5
November 4 4 4 4 5 4 5
October 4 5 4 5 4 5 4
September 5 4 5 4 4 4 4
In [29]:
print(df_cross.columns.values) # Column's name
['Friday' 'Monday' 'Saturday' 'Sunday' 'Thursday' 'Tuesday' 'Wednesday']
In [30]:
df_cross.sort_index(inplace = True)
# Order column based on list cats and index by cats_month
df_cross[cats].reindex(cats_month.values())
Out[30]:
Hari Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Bulan
January 5 5 4 4 4 4 5
February 4 4 4 4 4 4 4
March 4 4 5 5 5 4 4
April 4 4 4 4 4 5 5
May 5 5 5 4 4 4 4
June 4 4 4 5 5 4 4
July 5 4 4 4 4 5 5
August 4 5 5 5 4 4 4
September 4 4 4 4 5 5 4
October 5 5 4 4 4 4 5
November 4 4 5 5 4 4 4
December 4 4 4 4 5 5 5

5 Pre Processing - Standarization

5.1 Min-Max Scaler

In [64]:
# Crete duplcate data for applying standarization
data_std = data_lemon.copy()
print(data_std.columns.values) # Column's names
['Date' 'Day' 'Temperature' 'Rainfall' 'Flyers' 'Price' 'Sales']
In [31]:
# Create a min max processing object
min_max_scaler = preprocessing.MinMaxScaler()
In [65]:
# Convert the column value of the dataframe as floats
data_std['Temperature'] = data_std['Temperature'].values.astype(float)
In [67]:
data_std['Temperature'] = min_max_scaler.fit_transform(data_std['Temperature'])
C:\Users\user\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py:321: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
C:\Users\user\Anaconda3\lib\site-packages\sklearn\preprocessing\data.py:356: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)
In [68]:
data_std['Temperature'].describe() # Summary of statistics
Out[68]:
count    365.000000
mean       0.519718
std        0.184468
min        0.000000
25%        0.394077
50%        0.523918
75%        0.640091
max        1.000000
Name: Temperature, dtype: float64

5.2 Normalization

In [69]:
data_std['Rainfall'] = (data_std['Rainfall'] - data_std['Rainfall'].mean())/data_std['Rainfall'].std()
In [70]:
data_std['Rainfall'].describe() # Summary of statistics
Out[70]:
count    3.650000e+02
mean    -2.355498e-15
std      1.000000e+00
min     -1.305419e+00
25%     -6.464911e-01
50%     -3.170274e-01
75%      3.052930e-01
max      6.125819e+00
Name: Rainfall, dtype: float64

6 Slicing, Filtering, and Subsetting Data

In [35]:
data_filter = data_lemon.copy()
data_filter.head()
Out[35]:
Tanggal Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
0 2017-01-01 January Sunday 0.135535 2.00 15 0.3 10 3.0
1 2017-01-02 January Monday 0.157175 1.33 15 0.3 13 3.9
2 2017-01-03 January Tuesday 0.220957 1.33 27 0.3 15 4.5
3 2017-01-04 January Wednesday 0.330296 1.05 28 0.3 17 5.1
4 2017-01-05 January Thursday 0.310934 1.00 33 0.3 18 5.4
In [36]:
data_filter.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 9 columns):
Tanggal       365 non-null datetime64[ns]
Bulan         365 non-null object
Hari          365 non-null object
Suhu          365 non-null float64
Rainfall      365 non-null float64
Leaflet       365 non-null int64
Harga         365 non-null float64
Penjualan     365 non-null int64
Pendapatan    365 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(2), object(2)
memory usage: 25.7+ KB

6.1 Set Date Column as Index

In [37]:
data_filter.set_index('Tanggal',inplace = True) # Inplace means we set index permanently to daata_filter
In [38]:
data_filter.head()
Out[38]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-01 January Sunday 0.135535 2.00 15 0.3 10 3.0
2017-01-02 January Monday 0.157175 1.33 15 0.3 13 3.9
2017-01-03 January Tuesday 0.220957 1.33 27 0.3 15 4.5
2017-01-04 January Wednesday 0.330296 1.05 28 0.3 17 5.1
2017-01-05 January Thursday 0.310934 1.00 33 0.3 18 5.4

6.2 Column Selecting

In [39]:
data_filter[['Bulan','Hari','Suhu','Penjualan']].head()
Out[39]:
Bulan Hari Suhu Penjualan
Tanggal
2017-01-01 January Sunday 0.135535 10
2017-01-02 January Monday 0.157175 13
2017-01-03 January Tuesday 0.220957 15
2017-01-04 January Wednesday 0.330296 17
2017-01-05 January Thursday 0.310934 18

6.3 LOC and ILOC for Selection Data

In [40]:
data_filter.loc['2017-01-01'] # Can be a list of index
Out[40]:
Bulan          January
Hari            Sunday
Suhu          0.135535
Rainfall             2
Leaflet             15
Harga              0.3
Penjualan           10
Pendapatan           3
Name: 2017-01-01 00:00:00, dtype: object
In [41]:
data_filter.loc['2017-01-01',['Hari','Leaflet','Penjualan','Pendapatan']]
Out[41]:
Hari          Sunday
Leaflet           15
Penjualan         10
Pendapatan         3
Name: 2017-01-01 00:00:00, dtype: object
In [42]:
data_filter.iloc[[1,2,3,4,5,6],[1,4,6,7]] # Numeric
Out[42]:
Hari Leaflet Penjualan Pendapatan
Tanggal
2017-01-02 Monday 15 13 3.9
2017-01-03 Tuesday 27 15 4.5
2017-01-04 Wednesday 28 17 5.1
2017-01-05 Thursday 33 18 5.4
2017-01-06 Friday 23 11 3.3
2017-01-07 Saturday 19 13 3.9

6.4 Filtering Dataframe

6.4.1 Sample 1 - One Condition

In [43]:
# Our filter
monday_index = data_filter.Hari == 'Monday'
In [44]:
# Print the type of variable 'monday_filter' and length
print('Data type: ',type(monday_index),'\n Length of variable: ',len(monday_index))
Data type:  <class 'pandas.core.series.Series'> 
 Length of variable:  365
In [45]:
data_filter[monday_index].head()
Out[45]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-02 January Monday 0.157175 1.33 15 0.3 13 3.9
2017-01-09 January Monday 0.261959 1.18 20 0.3 17 5.1
2017-01-16 January Monday 0.176538 1.67 24 0.3 12 3.6
2017-01-23 January Monday 0.261959 1.05 21 0.3 17 5.1
2017-01-30 January Monday 0.296128 1.05 20 0.3 17 5.1
In [46]:
data_filter[monday_index]['Hari'].unique() # It make us sure with filter we want
Out[46]:
array(['Monday'], dtype=object)

6.4.2 Sample 2 - Multiple Conditions

In [47]:
# Our filter
monday_index = data_filter.Hari == 'Monday'
sales_index = data_filter.Penjualan > 15
In [48]:
# Operator & for 'and', | for 'or', ~ for 'not'
monday_sales_index = monday_index & sales_index
In [49]:
data_monday_sales = data_filter[monday_sales_index]
In [50]:
print(data_monday_sales['Penjualan'].describe())
print('Unique Date: ',data_monday_sales['Hari'].unique())
count    47.000000
mean     26.765957
std       6.175734
min      17.000000
25%      23.000000
50%      26.000000
75%      30.000000
max      42.000000
Name: Penjualan, dtype: float64
Unique Date:  ['Monday']

6.4.3 Data Selection in One Line

In [51]:
data_filter[data_filter.Hari == 'Monday'].head() # Same with 6.4.1
Out[51]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-02 January Monday 0.157175 1.33 15 0.3 13 3.9
2017-01-09 January Monday 0.261959 1.18 20 0.3 17 5.1
2017-01-16 January Monday 0.176538 1.67 24 0.3 12 3.6
2017-01-23 January Monday 0.261959 1.05 21 0.3 17 5.1
2017-01-30 January Monday 0.296128 1.05 20 0.3 17 5.1
In [52]:
data_filter[(data_filter.Hari == 'Monday') & (data_filter.Penjualan > 15)].head() # Same with 6.4.2
Out[52]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-09 January Monday 0.261959 1.18 20 0.3 17 5.1
2017-01-23 January Monday 0.261959 1.05 21 0.3 17 5.1
2017-01-30 January Monday 0.296128 1.05 20 0.3 17 5.1
2017-02-06 February Monday 0.340547 0.95 28 0.3 20 6.0
2017-02-13 February Monday 0.356492 1.11 34 0.3 18 5.4

6.4.4 Between Method

In [53]:
# Let's say that we want to get all day with sales between 15 and 30 iclusive
sales_index_manual = (data_filter.Penjualan >= 15) & (data_filter.Penjualan <= 30)
data_filter[sales_index_manual].head()
Out[53]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-03 January Tuesday 0.220957 1.33 27 0.3 15 4.5
2017-01-04 January Wednesday 0.330296 1.05 28 0.3 17 5.1
2017-01-05 January Thursday 0.310934 1.00 33 0.3 18 5.4
2017-01-08 January Sunday 0.255125 1.18 28 0.3 15 4.5
2017-01-09 January Monday 0.261959 1.18 20 0.3 17 5.1
In [54]:
data_filter[sales_index_manual].info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 259 entries, 2017-01-03 to 2017-12-29
Data columns (total 8 columns):
Bulan         259 non-null object
Hari          259 non-null object
Suhu          259 non-null float64
Rainfall      259 non-null float64
Leaflet       259 non-null int64
Harga         259 non-null float64
Penjualan     259 non-null int64
Pendapatan    259 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 18.2+ KB
In [55]:
# With 'between' method
sales_index_between = data_filter.Penjualan.between(15,30)
data_filter[sales_index_between].head()
Out[55]:
Bulan Hari Suhu Rainfall Leaflet Harga Penjualan Pendapatan
Tanggal
2017-01-03 January Tuesday 0.220957 1.33 27 0.3 15 4.5
2017-01-04 January Wednesday 0.330296 1.05 28 0.3 17 5.1
2017-01-05 January Thursday 0.310934 1.00 33 0.3 18 5.4
2017-01-08 January Sunday 0.255125 1.18 28 0.3 15 4.5
2017-01-09 January Monday 0.261959 1.18 20 0.3 17 5.1
In [56]:
data_filter[sales_index_between].info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 259 entries, 2017-01-03 to 2017-12-29
Data columns (total 8 columns):
Bulan         259 non-null object
Hari          259 non-null object
Suhu          259 non-null float64
Rainfall      259 non-null float64
Leaflet       259 non-null int64
Harga         259 non-null float64
Penjualan     259 non-null int64
Pendapatan    259 non-null float64
dtypes: float64(4), int64(2), object(2)
memory usage: 18.2+ KB

7 Export CSV File into Device

In [71]:
data_std.to_csv('Standarization Data.csv',index = False,header = True)